Yushi Tang
February 26/28, 2019
Wang, Z., Gerstein, M., & Snyder, M. (2009). RNA-Seq: a revolutionary tool for transcriptomics. Nature reviews genetics, 10(1), 57.
#!/bin/bash
#SBATCH -N 1 # Number of nodes
#SBATCH -n 10 # Number of cores
#SBATCH -t 180 # Runtime in minutes (0~10080)
#SBATCH -p general # Partition
#SBATCH --mem=50000 # Total memory (varies across nodes)
#SBATCH -o star_%j.out # Standard out goes to this file
#SBATCH -e star_%j.err # Standard err goes to this file
#SBATCH --mail-type=END # Email
#SBATCH --mail-user=YOUR_EMAIL
module load gcc/4.8.2-fasrc01 STAR/2.5.0c-fasrc02
STAR --genomeDir $GENOME \
--readFilesIn $FASTQ1 $FASTQ2 \
--outFileNamePrefix $OUTDIR/ \
--outSAMprimaryFlag AllBestScore \
--outSAMtype BAM SortedByCoordinate \
--runThreadN 10 \
--alignEndsType EndToEnd sbatch STARalignment.shSalmon tutorial 1
Salmon Tutorial
Salmon tutorial 2
Salmon tutorial 3
Salmon tutorial 4
Salmon tutorial 5
Salmon tutorial 6
Salmon tutorial 7
Salmon tutorial 8
Salmon tutorial 9
Salmon tutorial 10
Salmon tutorial 11
Enjoy your Salmon
#!/bin/bash
#SBATCH -N 1 # Number of nodes
#SBATCH -n 10 # Number of cores
#SBATCH -t 180 # Runtime in minutes (0~10080)
#SBATCH -p general # Partition
#SBATCH --mem=50000 # Total memory (varies across nodes)
#SBATCH -o star_%j.out # Standard out goes to this file
#SBATCH -e star_%j.err # Standard err goes to this file
#SBATCH --mail-type=END # Email
#SBATCH --mail-user=YOUR_EMAIL
module load salmon
salmon index -t $TRANSCRIPTOME -i $INDEXsbatch createSalmonIndex.sh#!/bin/bash
#SBATCH -N 1 # Number of nodes
#SBATCH -n 10 # Number of cores
#SBATCH -t 180 # Runtime in minutes (0~10080)
#SBATCH -p general # Partition
#SBATCH --mem=50000 # Total memory (varies across nodes)
#SBATCH -o star_%j.out # Standard out goes to this file
#SBATCH -e star_%j.err # Standard err goes to this file
#SBATCH --mail-type=END # Email
#SBATCH --mail-user=YOUR_EMAIL
module load salmon
salmon quant -i $INDEX \
-l A \
-1 $FASTQ/ENCFF500PDO_sub.fastq\
-2 $FASTQ/ENCFF708KQE_sub.fastq \
-o $OUT \
--numBootstraps 100 \
-p 10 \
--gcBiassbatch Salmonalignment.sh.sf file. Copy these to your local directory (e.g. scp or fileZilla) for downstream analysis (DESeq part)DESeq2 packages. Install this via bioconductor.# Install required packages
source("https://bioconductor.org/biocLite.R")
biocLite("BiocUpgrade")
biocLite("DESeq2")
biocLite("tximport")
biocLite("EnsDb.Hsapiens.v86")
biocLite("EnsDb.Mmusculus.v79")
install.packages("rjson")library(DESeq2)
files <- grep("sf",list.files("Data"),value=TRUE)
condition <- c("4oh", "4oh", "4oh", "ctrl", "ctrl", "ctrl")
names <- c("4oh1", "4oh2", "4oh3", "ctrl1", "ctrl2", "ctrl3")
sampleTable <- data.frame(sampleName = files, fileName = files, condition = condition)library(EnsDb.Mmusculus.v79)
txdf <- transcripts(EnsDb.Mmusculus.v79, return.type="DataFrame")
tx2gene <- as.data.frame(txdf[,c("tx_id", "gene_id")])library(tximport)
txi <- tximport(file.path("Data",files), type="salmon", ignoreTxVersion = TRUE, tx2gene = tx2gene)
dds <- DESeqDataSetFromTximport(txi,colData=sampleTable,design=~condition)
dds <- dds[rowSums(counts(dds)) > 1, ]
dds <- DESeq(dds)res <- results(dds, alpha = 0.05)
res <- res[complete.cases(res),]
res <- res[order(res$padj),]
upR <- res[(res$padj < 0.05) & (res$log2FoldChange > 0),]
downR <- res[(res$padj < 0.05) & (res$log2FoldChange < 0),]
nrow(upR)## [1] 382
nrow(downR) ## [1] 518
plotMA(res)absOrdered <- rbind(upR,downR)
absOrdered <- absOrdered[order(abs(absOrdered$log2FoldChange),decreasing = TRUE),]
mostvariable <- log2(txi$abundance[row.names(absOrdered),]+.0001)
library(gplots)
heatmap.2(mostvariable[1:100,],trace="none",col=greenred(10))#!/bin/bash
#SBATCH -N 1 # Number of nodes
#SBATCH -n 10 # Number of cores
#SBATCH -t 240 # Runtime in minutes (0~10080)
#SBATCH -p general,serial_requeue,shared # Partition
#SBATCH --mem=50000 # Total memory (varies across nodes)
#SBATCH -o salmon_%j.out # Standard out goes to this file
#SBATCH -e salmon_%j.err # Standard err goes to this file
#SBATCH --mail-type=END # Email
#SBATCH --mail-user=ytang@hsph.harvard.edu
module load salmon
salmon index -t $TRANSCRIPTOME -i $INDEXBasic steps to access the cluster
Running jobs interactively: srun command
Rename path: mv command
Useful commands for data management
Inquire path on the Odyssey
Upload scripts to the Odyssey
View script list
View specific script
View specific script
Manage current jobs
Download the output